## calculate all reference values that we need in Overleaf
## we need the referenecs on EA11 

import pandas as pd
import pyodbc

path_data = r'P:\ECB business areas\DGR\Staff\papouts\Research\Active\Firm Heterogeneities\Final datasets'
path_charts = r'P:\ECB business areas\DGR\Staff\papouts\Research\Active\Firm Heterogeneities\Final charts\Summary statistics'

ea11 = ['AT','BE','DE','ES','FI','FR','GR','IE','IT','NL','PT']
# store all values in a list
vals = []
# load data
dbtr_lvl = pd.read_stata(path_data + r'\dbtr_lvl_dt_f.dta')
dbtr_lvl.drop(dbtr_lvl[dbtr_lvl.nace_code == ''].index, inplace=True)
instr_lvl = pd.read_stata(path_data + r'\instr_lvl_dt_f.dta')
instr_lvl.drop(instr_lvl[instr_lvl.nace_code == ''].index, inplace=True)
instr_lvl.drop(instr_lvl[instr_lvl.outst_amnt_byinstr.isna() == True].index, inplace=True)
AC_final = pd.read_stata(path_data + r'\dbtr_combined_dt.dta')
instr_lvl_mb = pd.read_stata(path_data + r'\instr_lvl_mb_dt_f.dta')
instr_lvl_mb.drop(instr_lvl_mb[instr_lvl_mb.nace_code == ''].index, inplace=True)


## Section 2.1
# estimate EA11 ONA out of EA19 ONA
n1 = dbtr_lvl[dbtr_lvl.cntry_dbtr.isin(ea11)]['ona_dbtr'].sum()/dbtr_lvl['ona_dbtr'].sum()
vals.append('{ea11_perc}{'+str(round(n1*100,0))+'\%}')

# loans granted in EUR of all in 2019
# this will be estimated using the source AC table
sql_conn = pyodbc.connect('DSN=DISC DP Impala 64bit',autocommit=True)

query = """
 SELECT  sum(otstndng_nmnl_amnt_cv_c) as ALL_SUM
 FROM (
 SELECT  *,
         MAX(CASE WHEN default_aggr_dbtr_lvl = -99 AND default_dbtr_lvl = -99 THEN -99
                  WHEN (default_aggr_dbtr_lvl = 0 AND default_dbtr_lvl = -99) OR
                   (default_aggr_dbtr_lvl = 0 AND default_dbtr_lvl = 0) OR
                   (default_aggr_dbtr_lvl = -99 AND default_dbtr_lvl = 0) THEN 0
                   ELSE 1 END) OVER (PARTITION BY cntry_dbtr, dbtr_id) AS dflt_stts_debtor
 FROM (
 SELECT  t2.cntry_dbtr,
         t1.obsrvd_agnt_id,
         t1.dbtr_id,
         t1.crdtr_id,
         t1.cntrct_id,
         t1.instrmnt_id, 
         MAX(CASE WHEN t1.dflt_stts_instrmnt IN (18,19,20) THEN 1   
                      WHEN t1.dflt_stts_instrmnt IN (14) THEN 0 
                      ELSE t1.dflt_stts_instrmnt END) OVER (PARTITION BY cntry_dbtr, dbtr_id) AS default_aggr_dbtr_lvl,
         MAX(CASE WHEN t1.dflt_stts_dbtr IN (18,19,20) THEN 1
                      WHEN t1.dflt_stts_dbtr IN (14) THEN 0 
                      ELSE t1.dflt_stts_dbtr END) OVER (PARTITION BY cntry_dbtr, dbtr_id) AS default_dbtr_lvl,
         CASE WHEN t1.otstndng_nmnl_amnt_cv <= 0 OR t1.otstndng_nmnl_amnt_cv >= 300000000 THEN NULL ELSE otstndng_nmnl_amnt_cv END AS otstndng_nmnl_amnt_cv_c
 FROM
        crp_anacredit.anacredit_dm_instrmnt_fct_cc t1
 INNER JOIN
        (SELECT DISTINCT entty_riad_id, dt_rfrnc, cntry as cntry_dbtr
        FROM crp_anacredit.anacredit_dm_d_dbtr_cc cc
        WHERE instttnl_sctr = 'S11' AND
              SUBSTR(ecnmc_actvty,1,2) NOT IN ('64','65','66') AND
              cntry in ('AT','BE','CY','DE','EE','ES','FI','FR','GR','IE','IT','LT','LU','LV','MT','NL','PT','SI','SK') AND
              dt_rfrnc = 201912 AND
              entty_riad_cd = entty_riad_cd_le) t2
 ON t1.dbtr_id = t2.entty_riad_id AND t1.dt_rfrnc = t2.dt_rfrnc

 WHERE 
       t1.typ_instrmnt NOT IN (1000, 1003, -99) AND  
       t1.nmbr_crdtr = 1 AND
       t1.syndctd_cntrct_id IS NULL AND
       t1.prjct_fnnc_ln != 1
       ) k1
       ) k2
WHERE  dflt_stts_debtor = 0
"""

all_sum = pd.read_sql(query, sql_conn)
sql_conn.close()

eur_sum = dbtr_lvl['ona_dbtr'].sum()
eur_shr = eur_sum/all_sum
n2 = eur_shr.iloc[0,0]
vals.append('{eur_perc}{'+str(round(n2*100,0))+'\%}')

del eur_shr

# final sample firms and banks
# # use the main bank data set
# only EA11
nuniq = instr_lvl_mb[(instr_lvl_mb.cntry_dbtr.isin(ea11) == True)].nunique()
n3 = nuniq.loc['dbtr_id']
n4 = nuniq.loc['crdtr_id']
vals.append('{nmbr_dbtrs}{'+str(n3)+'}')
vals.append('{nmbr_crdtrs}{'+str(n4)+'}')

del nuniq

## Appendix B
# percentage of firms classified
# all firms
sql_conn = pyodbc.connect('DSN=DISC DP Impala 64bit',autocommit=True)

query = """

select count(entty_riad_id) as nfirms
from lab_org_dgr_fir.ac_dbtr_201912_kk

"""

nfirms = pd.read_sql(query, sql_conn)
sql_conn.close()

# classified
n5 = AC_final.shape[0]/nfirms.loc[0,'nfirms']
vals.append('{classified_perc}{'+str(round(n5*100,0))+'\%}')

del nfirms

## Section 2.4
# mean values of interest rates by size class and type of instrument
# apply the treatment for the plots
instr_lvl_c = instr_lvl.copy()
instr_lvl_c.rename(columns={'cntry_dbtr':'CNTRY', 'typ_instr_corr':'TYP_INSTR_CORR', 'wir_byloantype':'W_RT_BYTYPE'}, inplace=True)
instr_lvl_c.drop(instr_lvl_c[instr_lvl_c.W_RT_BYTYPE.isna() == True].index, inplace=True)
instr_lvl_c.drop(instr_lvl_c[instr_lvl_c.CNTRY.isin(ea11) == False].index, inplace=True)

# construct debtor level dataset
dt1 = instr_lvl_c.copy()
dt1['wir_ona'] = dt1['ona_intrate_c_ttl'] * dt1['W_RT_BYTYPE']
gr1 = dt1.groupby(['CNTRY','dbtr_id'])['wir_ona'].sum().reset_index()
gr1.rename(columns={'wir_ona':'sum_wir_ona'}, inplace=True)
gr2 = dt1.groupby(['CNTRY','dbtr_id'])['ona_intrate_c_ttl'].sum().reset_index()
gr2.rename(columns={'ona_intrate_c_ttl':'sum_ona'}, inplace=True)

dt1 = dt1.merge(gr1, how='left', on=['CNTRY','dbtr_id'])
dt1 = dt1.merge(gr2, how='left', on=['CNTRY','dbtr_id'])

dt1['wir_dbtr'] = dt1['sum_wir_ona']/dt1['sum_ona']
AC_final_bydbtr = dt1[['CNTRY', 'dbtr_id', 'sz_f', 'nace_code','wir_dbtr']].drop_duplicates()
AC_final_bydbtr.isna().sum()
AC_final_bydbtr.rename(columns={'wir_dbtr':'W_RT_BYDBTR'}, inplace=True)

map_size = {1:'Large', 2:'Medium', 3:'Small', 4:'Micro'}
AC_final_bydbtr['sz_class'] = AC_final_bydbtr['sz_f'].map(map_size)

# mean values
sz_descr = AC_final_bydbtr.groupby(['sz_class'])['W_RT_BYDBTR'].describe()
loan_descr = instr_lvl_c.groupby(['TYP_INSTR_CORR'])['W_RT_BYTYPE'].describe()
n6 = sz_descr.loc['Large','mean']
n7 = sz_descr.loc['Medium','mean']
n8 = sz_descr.loc['Small','mean']
n9 = sz_descr.loc['Micro','mean']
n10 = loan_descr.loc['Credit lines','mean']
n11 = loan_descr.loc['Finance leases','mean']
n12 = loan_descr.loc['Loans','mean']
n13 = loan_descr.loc['Revolving credit','mean']
n14 = loan_descr.loc['Trade receivables','mean']

vals.append('{wir_mean_large}{'+str(round(n6*100,2))+'\%}')
vals.append('{wir_mean_medium}{'+str(round(n7*100,2))+'\%}')
vals.append('{wir_mean_small}{'+str(round(n8*100,2))+'\%}')
vals.append('{wir_mean_micro}{'+str(round(n9*100,2))+'\%}')
vals.append('{wir_mean_CL}{'+str(round(n10*100,2))+'\%}')
vals.append('{wir_mean_FL}{'+str(round(n11*100,2))+'\%}')
vals.append('{wir_mean_L}{'+str(round(n12*100,2))+'\%}')
vals.append('{wir_mean_RC}{'+str(round(n13*100,2))+'\%}')
vals.append('{wir_mean_TR}{'+str(round(n14*100,2))+'\%}')

del instr_lvl_c, dt1, gr1, gr2, AC_final_bydbtr, sz_descr, loan_descr

## Section 5
# avg maturity for FR, AT, FI, BE and GR
# get the maturity for all countries by size
# apply the treatment for the plots
# load data
# drop redundant columns
AC_bytype = instr_lvl.copy()
AC_bytype.drop(columns=['prtctn_amnt_byinstr', 'ona_intrate_c_ttl','ona_pddbtr_c_ttl', 'wir_byloantype', 'pddbtr_byloantype'], inplace=True)
# get final datasets
# instrument level
AC_final_bytype = AC_bytype[AC_bytype.mtrty_byloantype.isna() == False].copy()
# debtor level (weighted)
# debtor dataset should include only credit lines and loans
ona_dbtr = AC_final_bytype[AC_final_bytype.typ_instr_corr.isin(['Loans','Credit lines'])].groupby(['cntry_dbtr','dbtr_id'])['ona_orgnlmtrty_c_ttl'].sum().reset_index()
# there are around 300k debtors that do not have loans or credit lines
ac_dbtr = AC_final_bytype[['cntry_dbtr','dbtr_id','sz_f','nace_code','typ_instr_corr','ona_orgnlmtrty_c_ttl','mtrty_byloantype']].copy()
ac_dbtr['onaxmtr'] = ac_dbtr['ona_orgnlmtrty_c_ttl']*ac_dbtr['mtrty_byloantype']
mtr_dbtr = ac_dbtr[ac_dbtr.typ_instr_corr.isin(['Loans','Credit lines'])].groupby(['cntry_dbtr','dbtr_id'])['onaxmtr'].sum().reset_index()
merge_dbtr = ona_dbtr.merge(mtr_dbtr, how='inner', on=['cntry_dbtr','dbtr_id'])
merge_dbtr['mtrty_dbtr'] = merge_dbtr['onaxmtr']/merge_dbtr['ona_orgnlmtrty_c_ttl']
AC_final_bydbtr = ac_dbtr[['cntry_dbtr','dbtr_id','sz_f','nace_code']].drop_duplicates().merge(merge_dbtr[['cntry_dbtr','dbtr_id','mtrty_dbtr']], how='inner',on=['cntry_dbtr','dbtr_id'])
# drop missing nace codes
AC_final_bytype.drop(AC_final_bytype[AC_final_bytype.nace_code == ''].index, inplace=True)
AC_final_bydbtr.drop(AC_final_bydbtr[AC_final_bydbtr.nace_code == ''].index, inplace=True)

# mean values for large firms
sz_descr = AC_final_bydbtr.groupby(['cntry_dbtr','sz_f'])['mtrty_dbtr'].describe()
n15 = sz_descr.loc[('NL', 1.0),'mean']
n16 = sz_descr.loc[('DE', 1.0),'mean']
n17 = sz_descr.loc[('FR', 1.0),'mean']
n18 = sz_descr.loc[('AT', 1.0),'mean']
n19 = sz_descr.loc[('FI', 1.0),'mean']
n20 = sz_descr.loc[('BE', 1.0),'mean']
n21 = sz_descr.loc[('GR', 1.0),'mean']
n22 = sz_descr.loc[('ES', 1.0),'mean']
n23 = sz_descr.loc[('PT', 1.0),'mean']
n24 = sz_descr.loc[('IT', 1.0),'mean']
n25 = sz_descr.loc[('IE', 1.0),'mean']

vals.append('{wmtrty_mean_NL}{'+str(round(n15,1))+'}')
vals.append('{wmtrty_mean_DE}{'+str(round(n16,1))+'}')
vals.append('{wmtrty_mean_FR}{'+str(round(n17,1))+'}')
vals.append('{wmtrty_mean_AT}{'+str(round(n18,1))+'}')
vals.append('{wmtrty_mean_FI}{'+str(round(n19,1))+'}')
vals.append('{wmtrty_mean_BE}{'+str(round(n20,1))+'}')
vals.append('{wmtrty_mean_GR}{'+str(round(n21,1))+'}')
vals.append('{wmtrty_mean_ES}{'+str(round(n22,1))+'}')
vals.append('{wmtrty_mean_PT}{'+str(round(n23,1))+'}')
vals.append('{wmtrty_mean_IT}{'+str(round(n24,1))+'}')
vals.append('{wmtrty_mean_IE}{'+str(round(n25,1))+'}')

# comparison of Italian to German firms
n26 = (sz_descr.loc[('DE', 1.0),'mean'] - sz_descr.loc[('IT', 1.0),'mean'])/sz_descr.loc[('DE', 1.0),'mean']
vals.append('{wmtrty_mean_IT_DE}{'+str(round(n26*100,0))+'\%}')

del AC_bytype, AC_final_bytype, ona_dbtr, ac_dbtr, mtr_dbtr, merge_dbtr, AC_final_bydbtr, sz_descr

## Section 6.1
# interest rates by firm size
instr_lvl_c = instr_lvl.copy()
instr_lvl_c.rename(columns={'cntry_dbtr':'CNTRY', 'typ_instr_corr':'TYP_INSTR_CORR', 'wir_byloantype':'W_RT_BYTYPE'}, inplace=True)
instr_lvl_c.drop(instr_lvl_c[instr_lvl_c.W_RT_BYTYPE.isna() == True].index, inplace=True)

# construct debtor level dataset
dt1 = instr_lvl_c.copy()
dt1['wir_ona'] = dt1['ona_intrate_c_ttl'] * dt1['W_RT_BYTYPE']
gr1 = dt1.groupby(['CNTRY','dbtr_id'])['wir_ona'].sum().reset_index()
gr1.rename(columns={'wir_ona':'sum_wir_ona'}, inplace=True)
gr2 = dt1.groupby(['CNTRY','dbtr_id'])['ona_intrate_c_ttl'].sum().reset_index()
gr2.rename(columns={'ona_intrate_c_ttl':'sum_ona'}, inplace=True)

dt1 = dt1.merge(gr1, how='left', on=['CNTRY','dbtr_id'])
dt1 = dt1.merge(gr2, how='left', on=['CNTRY','dbtr_id'])

dt1['wir_dbtr'] = dt1['sum_wir_ona']/dt1['sum_ona']
AC_final_bydbtr = dt1[['CNTRY', 'dbtr_id', 'sz_f', 'nace_code','wir_dbtr']].drop_duplicates()
AC_final_bydbtr.isna().sum()
AC_final_bydbtr.rename(columns={'wir_dbtr':'W_RT_BYDBTR'}, inplace=True)

# mean values for large firms
sz_descr = AC_final_bydbtr.groupby(['CNTRY','sz_f'])['W_RT_BYDBTR'].describe()

n27 = sz_descr.loc[('GR', 1.0),'mean']
n28 = sz_descr.loc[('IE', 1.0),'mean']
n29 = sz_descr.loc[('NL', 1.0),'mean']
n30 = sz_descr.loc[('IE', 1.0),'75%']
n31 = sz_descr.loc[('IE', 1.0),'25%']
n32 = sz_descr.loc[('PT', 1.0),'mean']
n33 = sz_descr.loc[('IT', 1.0),'mean']
n34 = sz_descr.loc[('DE', 1.0),'mean']
n35 = sz_descr.loc[('FR', 1.0),'mean']
n36 = sz_descr.loc[('ES', 1.0),'mean']
n37 = sz_descr.loc[('BE', 1.0),'mean']
n38 = sz_descr.loc[('AT', 1.0),'mean']
n39 = sz_descr.loc[('FI', 1.0),'mean']

vals.append('{wir_mean_GR}{'+str(round(n27*100,1))+'\%}')
vals.append('{wir_mean_IE}{'+str(round(n28*100,1))+'\%}')
vals.append('{wir_mean_NL}{'+str(round(n29*100,1))+'\%}')
vals.append('{wir_p75_IE}{'+str(round(n30*100,1))+'\%}')
vals.append('{wir_p25_IE}{'+str(round(n31*100,1))+'\%}')
vals.append('{wir_mean_PT}{'+str(round(n32*100,1))+'\%}')
vals.append('{wir_mean_IT}{'+str(round(n33*100,1))+'\%}')
vals.append('{wir_mean_DE}{'+str(round(n34*100,1))+'\%}')
vals.append('{wir_mean_FR}{'+str(round(n35*100,1))+'\%}')
vals.append('{wir_mean_ES}{'+str(round(n36*100,1))+'\%}')
vals.append('{wir_mean_BE}{'+str(round(n37*100,1))+'\%}')
vals.append('{wir_mean_AT}{'+str(round(n38*100,1))+'\%}')
vals.append('{wir_mean_FI}{'+str(round(n39*100,1))+'\%}')

del instr_lvl_c, dt1, gr2, AC_final_bydbtr, sz_descr


# include \numref for the file in Overleaf
vals_f = []
for i in vals:    
    vals_f.append('\\numref'+i)

# save in a text file
with open(path_charts + r'\dynamicrefs.txt', 'w') as f:
    for i in vals_f:
        f.write(i)
        f.write('\n')







